## Waiting for profiling to be done...
We’ll model the trailheads independently since they are so different. Lots of things to say, but one note is that at Kebler, only high ratings lead to statistically significant drops in use, but at Slate, there’s a noticeable drop at considerable and maybe even moderate.
glm2brushrd <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled=="sled")&(winter_travel$Trailhead=="Brush Creek Rd"),])
summary(glm2brushrd)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## "sled") & (winter_travel$Trailhead == "Brush Creek Rd"),
## ], init.theta = 0.05715315239, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4337 -0.4337 -0.3100 -0.3100 1.7695
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.4307 0.4854 -2.947 0.0032 **
## rating_near2 -1.1550 0.6681 -1.729 0.0839 .
## rating_near3 -0.8718 0.7927 -1.100 0.2714
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.0572) family taken to be 1)
##
## Null deviance: 61.373 on 307 degrees of freedom
## Residual deviance: 58.068 on 305 degrees of freedom
## (80 observations deleted due to missingness)
## AIC: 202.31
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.0572
## Std. Err.: 0.0208
##
## 2 x log-likelihood: -194.3100
glm2brushth <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled=="sled")&(winter_travel$Trailhead=="Brush Creek Trailhead"),])
summary(glm2brushth)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## "sled") & (winter_travel$Trailhead == "Brush Creek Trailhead"),
## ], init.theta = 0.0557544471, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4045 -0.2330 -0.2256 -0.2256 2.3487
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.6818 0.7363 -2.284 0.0224 *
## rating_near2 -1.7522 0.9262 -1.892 0.0585 .
## rating_near3 -1.6716 0.9314 -1.795 0.0727 .
## rating_near4 -17.6208 2107.8559 -0.008 0.9933
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.0558) family taken to be 1)
##
## Null deviance: 47.436 on 360 degrees of freedom
## Residual deviance: 41.153 on 357 degrees of freedom
## (103 observations deleted due to missingness)
## AIC: 128.39
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.0558
## Std. Err.: 0.0325
##
## 2 x log-likelihood: -118.3850
glm2gothic <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled=="sled")&(winter_travel$Trailhead=="Gothic Rd"),])
summary(glm2gothic)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## "sled") & (winter_travel$Trailhead == "Gothic Rd"), ], init.theta = 0.1743449277,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5988 -0.5916 -0.5659 -0.5659 3.0254
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.19942 0.24902 -4.816 1.46e-06 ***
## rating_near2 -0.13809 0.30305 -0.456 0.649
## rating_near3 0.03846 0.33328 0.115 0.908
## rating_near4 -18.10317 2107.85569 -0.009 0.993
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1743) family taken to be 1)
##
## Null deviance: 289.12 on 667 degrees of freedom
## Residual deviance: 281.96 on 664 degrees of freedom
## (108 observations deleted due to missingness)
## AIC: 820.91
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.1743
## Std. Err.: 0.0308
##
## 2 x log-likelihood: -810.9120
glm2cement <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled=="sled")&(winter_travel$Trailhead=="Cement Creek"),])
summary(glm2cement)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## "sled") & (winter_travel$Trailhead == "Cement Creek"), ],
## init.theta = 0.4095885141, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2289 -1.1876 -1.0462 0.2969 1.9243
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.6322 0.1644 3.846 0.00012 ***
## rating_near2 0.1466 0.1939 0.756 0.44985
## rating_near3 -0.1916 0.2088 -0.917 0.35900
## rating_near4 -0.7948 0.4560 -1.743 0.08134 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.4096) family taken to be 1)
##
## Null deviance: 556.78 on 589 degrees of freedom
## Residual deviance: 549.43 on 586 degrees of freedom
## (44 observations deleted due to missingness)
## AIC: 2107.4
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.4096
## Std. Err.: 0.0388
##
## 2 x log-likelihood: -2097.3750
glm2kebler <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled=="sled")&(winter_travel$Trailhead=="Kebler"),])
summary(glm2kebler)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## "sled") & (winter_travel$Trailhead == "Kebler"), ], init.theta = 0.471224509,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0347 -1.1675 -0.5590 0.3043 1.8860
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.6281 0.1355 26.772 < 2e-16 ***
## rating_near2 -0.1710 0.1592 -1.074 0.28283
## rating_near3 -0.1107 0.1753 -0.632 0.52761
## rating_near4 -1.0589 0.3749 -2.825 0.00473 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.4712) family taken to be 1)
##
## Null deviance: 745.29 on 616 degrees of freedom
## Residual deviance: 738.68 on 613 degrees of freedom
## (49 observations deleted due to missingness)
## AIC: 5341.2
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.4712
## Std. Err.: 0.0260
##
## 2 x log-likelihood: -5331.1690
glm2slate <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled=="sled")&(winter_travel$Trailhead=="Slate River Rd"),])
summary(glm2slate)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## "sled") & (winter_travel$Trailhead == "Slate River Rd"),
## ], init.theta = 0.7680022861, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8372 -0.9813 -0.3281 0.2923 4.0188
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.8156 0.1030 17.623 < 2e-16 ***
## rating_near2 -0.2056 0.1225 -1.678 0.0933 .
## rating_near3 -0.2672 0.1354 -1.973 0.0485 *
## rating_near4 -2.4135 0.4082 -5.912 3.37e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.768) family taken to be 1)
##
## Null deviance: 822.13 on 695 degrees of freedom
## Residual deviance: 788.17 on 692 degrees of freedom
## (116 observations deleted due to missingness)
## AIC: 3721.1
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.7680
## Std. Err.: 0.0523
##
## 2 x log-likelihood: -3711.1070
glm2snodgrass <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled=="sled")&(winter_travel$Trailhead=="Snodgrass"),])
summary(glm2snodgrass)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## "sled") & (winter_travel$Trailhead == "Snodgrass"), ], init.theta = 0.1122561156,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6902 -0.6424 -0.6090 -0.6090 1.7293
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1927 0.3230 -0.597 0.551
## rating_near2 -0.5550 0.3913 -1.418 0.156
## rating_near3 -0.3293 0.4183 -0.787 0.431
## rating_near4 -1.4168 0.8943 -1.584 0.113
##
## (Dispersion parameter for Negative Binomial(0.1123) family taken to be 1)
##
## Null deviance: 219.97 on 492 degrees of freedom
## Residual deviance: 216.45 on 489 degrees of freedom
## (283 observations deleted due to missingness)
## AIC: 820.9
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.1123
## Std. Err.: 0.0171
##
## 2 x log-likelihood: -810.8970
glm2washington <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled=="sled")&(winter_travel$Trailhead=="Washington Gulch"),])
summary(glm2washington)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## "sled") & (winter_travel$Trailhead == "Washington Gulch"),
## ], init.theta = 0.6459698993, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5790 -1.5046 -0.3148 0.3216 3.0782
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.16315 0.11963 9.723 < 2e-16 ***
## rating_near2 0.17273 0.14265 1.211 0.226
## rating_near3 -0.03829 0.15580 -0.246 0.806
## rating_near4 -2.54945 0.54011 -4.720 2.36e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.646) family taken to be 1)
##
## Null deviance: 721.84 on 637 degrees of freedom
## Residual deviance: 690.61 on 634 degrees of freedom
## (170 observations deleted due to missingness)
## AIC: 2932.6
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.6460
## Std. Err.: 0.0509
##
## 2 x log-likelihood: -2922.5580
ggplot(winter_travel, aes(year, user.count))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
geom_boxplot()+
facet_grid(~modality)+
scale_y_log10()
day_totals <- all_users %>%
group_by(date, week, weekend) %>%
summarise(users = sum(user.count, na.rm = TRUE)) %>%
arrange(desc(users))
## `summarise()` has grouped output by 'date', 'week'. You can override using the `.groups` argument.
head(day_totals, 10)
## # A tibble: 10 x 4
## # Groups: date, week [10]
## date week weekend users
## <chr> <chr> <chr> <int>
## 1 2018-02-17 Sat weekend 819
## 2 2018-02-18 Sun weekend 744
## 3 2018-03-03 Sat weekend 665
## 4 2018-01-27 Sat weekend 659
## 5 2018-12-29 Sat weekend 623
## 6 2018-03-26 Mon weekday 586
## 7 2020-02-29 Sat weekend 573
## 8 2018-12-30 Sun weekend 568
## 9 2019-02-23 Sat weekend 568
## 10 2018-03-11 Sun weekend 544
ggplot(day_totals, aes((yday(date)+30)%%365, users))+
geom_smooth()+
geom_point(aes(color=weekend))+
scale_x_continuous(breaks = c(0,31,62,90,120),
labels = c("Dec 1", "Jan 1", "Feb 1", "Mar 1", "Apr 1"))+
labs(x="")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Jump in usage in the last week of the calendar year, otherwise gradual increase until sometime mid March.
day_totals_modes <- all_locations %>%
group_by(date, week, weekend, modality, has_sled) %>%
summarise(users = sum(user.count, na.rm = TRUE)) %>%
arrange(desc(users))
## `summarise()` has grouped output by 'date', 'week', 'weekend', 'modality'. You can override using the `.groups` argument.
head(day_totals_modes, 10)
## # A tibble: 10 x 6
## # Groups: date, week, weekend, modality [10]
## date week weekend modality has_sled users
## <chr> <chr> <chr> <chr> <chr> <int>
## 1 2018-01-27 Sat weekend Non.motorized no_sled 601
## 2 2018-02-17 Sat weekend Non.motorized no_sled 557
## 3 2018-02-18 Sun weekend Non.motorized no_sled 514
## 4 2018-03-03 Sat weekend Non.motorized no_sled 468
## 5 2019-02-23 Sat weekend Non.motorized no_sled 445
## 6 2018-03-21 Wed weekday Non.motorized no_sled 387
## 7 2020-02-29 Sat weekend Non.motorized no_sled 370
## 8 2017-12-30 Sat weekend Non.motorized no_sled 367
## 9 2018-03-26 Mon weekday Non.motorized no_sled 356
## 10 2020-03-15 Sun weekend Non.motorized no_sled 345
ggplot(day_totals_modes, aes((yday(date)+30)%%365, users))+
geom_smooth()+
geom_point(aes(color=weekend))+
scale_x_continuous(breaks = c(0,31,62,90,120),
labels = c("Dec 1", "Jan 1", "Feb 1", "Mar 1", "Apr 1"))+
scale_y_log10()+
labs(x="")+
facet_grid(modality~1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
day_totals_modes2 <- all_locations %>%
group_by(date, week, weekend, has_sled) %>%
summarise(users = sum(user.count, na.rm = TRUE)) %>%
arrange(desc(users))
## `summarise()` has grouped output by 'date', 'week', 'weekend'. You can override using the `.groups` argument.
ggplot(day_totals_modes2, aes((yday(date)+30)%%365, users))+
geom_smooth()+
geom_point(aes(color=weekend))+
scale_x_continuous(breaks = c(0,31,62,90,120),
labels = c("Dec 1", "Jan 1", "Feb 1", "Mar 1", "Apr 1"))+
scale_y_log10()+
labs(x="")+
facet_grid(has_sled~1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Variance is greater in the motorized groups, and the end of year and mid March peaks are more pronounced.
gam6 <- gam(user.count ~ modality+s((yday(date)+30)%%365), family = nb(), data=all_locations)
summary(gam6)
##
## Family: Negative Binomial(1.422)
## Link function: log
##
## Formula:
## user.count ~ modality + s((yday(date) + 30)%%365)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.36057 0.04470 52.81 <2e-16 ***
## modalityMechanized -1.10841 0.06674 -16.61 <2e-16 ***
## modalityMotorized 1.55441 0.06156 25.25 <2e-16 ***
## modalityNon.motorized 2.28642 0.06133 37.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s((yday(date) + 30)%%365) 8.184 8.818 390 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.513 Deviance explained = 64.2%
## -REML = 6632.1 Scale est. = 1 n = 1620
march_on <- all_users[month(all_users$date)%in%3:4,]
ggplot(march_on, aes((yday(march_on$date)+30%%365), user.count, color=year))+
geom_point()+geom_smooth()+
scale_x_continuous(breaks = c(90,122),
labels = c("Mar 1", "Apr 1"))+
scale_y_log10()+
labs(x="")+
facet_grid(has_sled~1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
I don’t see anything.
high_risk <- as.numeric(winter_travel$rating_above)+
as.numeric(winter_travel$rating_near)+
as.numeric(winter_travel$rating_below) > 9
summary(glm.nb(user.count~high_risk, data=winter_travel))
##
## Call:
## glm.nb(formula = user.count ~ high_risk, data = winter_travel,
## init.theta = 0.1912528575, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2092 -1.2092 -0.6758 -0.0838 4.4584
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.14641 0.02484 86.407 <2e-16 ***
## high_riskTRUE -1.20154 0.14481 -8.297 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1913) family taken to be 1)
##
## Null deviance: 8265.4 on 8938 degrees of freedom
## Residual deviance: 8215.0 on 8937 degrees of freedom
## (1709 observations deleted due to missingness)
## AIC: 45387
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.19125
## Std. Err.: 0.00345
##
## 2 x log-likelihood: -45381.15500
On high risk days the usage is about 30% of the average on days that are not high risk.